Step 2: Evaluate output
include_graphics("prsice_images/TERRE_PRSice_BARPLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_HIGH-RES_PLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_QUANTILES_PLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_BARPLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_HIGH-RES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_QUANTILES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_BARPLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_HIGH-RES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_QUANTILES_PLOT_2022-10-04.png")

Plotting PRSice Data on my own
library(ggnewscale)
prsice_male_meta <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.prsice")
ggplot(prsice_male_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
geom_point() +
scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
theme_minimal()

prsice_female_meta <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.prsice")
ggplot(prsice_female_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
geom_point() +
scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
theme_minimal()

prsice_meta <- fread("prsice_data/TERRE_PRSice.prsice")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
geom_point(data = prsice_female_meta, size = 1) +
scale_color_gradient(low = "lightpink4", high = "lightpink") +
labs(color = bquote("Female log"["10"] ~ "(P)")) +
new_scale_color() +
geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
scale_color_gradient(low = "lightblue4", high = "lightblue") +
labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Male -log"["10"] ~ "(P)")) +
theme_minimal() +
theme(legend.position = "top")

ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
geom_point(data = prsice_female_meta, size = 1) +
scale_color_gradient(low = "lightpink4", high = "lightpink",guide = guide_colorbar(order=3)) +
labs(color = bquote("Female -log"["10"] ~ "(P)")) +
new_scale_color() +
geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
scale_color_gradient(low = "lightblue4", high = "lightblue",guide = guide_colorbar(order=2)) +
labs(color = bquote("Male -log"["10"] ~ "(P)")) +
new_scale_color() +
geom_point(data = prsice_meta, size = 1, aes(color = -log10(P))) +
scale_color_gradient(low = "gray40", high = "gray80",guide = guide_colorbar(order=1)) +
labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Cross-sex -log"["10"] ~ "(P)")) +
theme_minimal() +
theme(legend.position = "top",legend.title = element_text(size=7))

Step 3 run linear model at different thresholds for SNP inclusion
This is updated to recently processed DNAm Data 20 outliers in DNAm removed:
load("/home1/NEURO/SHARE_DECIPHER/processed_DNAm_data/2022/TERRE_processed_2022/1-TERRE_RG_filtered.RData") #PD_RG_filtered
# Assign genotyping ID to data
original_covars <- fread("/home1/NEURO/SHARE_DECIPHER/terre_meta_master.csv")[, .(patient, IID = gsub("_PAE.*", "", IID))]
betas_combat <- minfi::getBeta(PD_RG_filtered)
colnames(betas_combat) <- original_covars$IID[match(colnames(betas_combat), original_covars$patient)]
betas_combat <- betas_combat[, colnames(betas_combat) %in% covariate$IID]
Let’s check how the data looks for the first 5 subjects:
ggplot(betas_combat[, 1:5] %>% as.data.table(keep.rownames = T) %>% melt(id.vars = "rn", value.name = "betas", variable.name = "subject"), aes(betas, color = subject))+
geom_density()

Match DNA, PRS, and metadata @TODO fix this after update of PRS etc.
prsice_best <- fread("prsice_data/TERRE_PRSice.best")[match(colnames(betas_combat), IID,nomatch=0),.(best=PRS)]
prsice_all <- cbind(
fread("prsice_data/TERRE_PRSice.all_score")[match(colnames(betas_combat), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
prsice_best
)
covariate <- covariate[match(colnames(betas_combat), IID)]
all(covariate$IID == colnames(betas_combat))
[1] TRUE
all(covariate$IID == prsice_all$IID)
[1] TRUE
covariate_male <- covariate[sex == 1] %>% select(-sex)
betas_male <- betas_combat[, covariate_male$IID]
prsice_male_best <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.best")[match(colnames(betas_male), IID,nomatch=0),.(best=PRS)]
prsice_male_all <- cbind(
fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.all_score")[match(colnames(betas_male), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
prsice_male_best
)
covariate_male <- covariate_male[match(colnames(betas_male), IID,nomatch=0)]
all(covariate_male$IID == colnames(betas_male))
[1] TRUE
all(covariate_male$IID == prsice_male_all$IID)
[1] TRUE
prsice_female_best <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.best")
covariate_female <- covariate[sex == 0] %>% select(-sex) %>% filter(IID %in% prsice_female_best$IID)
betas_female <- betas_combat[, covariate_female$IID]
prsice_female_best <- prsice_female_best[match(colnames(betas_female), IID,nomatch=0),.(best=PRS)]
prsice_female_all <- cbind(
fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.all_score")[match(colnames(betas_female), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
prsice_female_best
)
covariate_female <- covariate_female[match(colnames(betas_female), IID,nomatch=0)]
all(covariate_female$IID == colnames(betas_female))
[1] TRUE
all(covariate_female$IID == prsice_female_all$IID)
[1] TRUE
Run limma
mvalues <- lumi::beta2m(betas_combat)
prs_mat <- prsice_all[, -c(1, 2)]
cov_mat <- covariate[, -c(1, 2)]
mvalues_male <- lumi::beta2m(betas_male)
prs_mat_male <- prsice_male_all[, -c(1, 2)]
cov_mat_male <- covariate_male[, -c(1, 2)]
mvalues_female <- lumi::beta2m(betas_female)
prs_mat_female <- prsice_female_all[, -c(1, 2)]
cov_mat_female <- covariate_female[, -c(1, 2)]
registerDoParallel(ncol(prs_mat) / 4)
hits <- foreach(prs_thresh = colnames(prs_mat)) %dopar% {
design_prs <- model.matrix(~., data = cbind(prs_mat[, ..prs_thresh], cov_mat))
prs_fit <- lmFit(mvalues, design_prs)
prs_fit <- eBayes(prs_fit)
topTable(prs_fit, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues))
}
Plotting EWAS vs Threshold Experiment by Sex
to_plot <- rbind(
hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold] %>%
mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
hits_by_thresh_bonf_male[, .(hits = .N, Sex = "Male"), by = threshold] %>%
mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
hits_by_thresh_bonf_female[, .(hits = .N, Sex = "Female"), by = threshold] %>%
mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0"))
) %>% mutate(Sex = factor(Sex, levels = c("Cross-sex", "Male", "Female")))
plot_pos <- position_dodge(width = 1)
ggplot(to_plot, aes(threshold, hits, fill = Sex, label = hits)) +
geom_text(position = plot_pos, vjust = -0.25) +
geom_col(position = plot_pos) +
labs(x = "GWAS P Value Threshold", y = "EWAS Hits") +
scale_fill_manual(values = c("grey80", "lightblue", "lightpink")) +
theme_minimal()

hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold]
hits_by_thresh_bonf_male[, .(hits = .N), by = threshold]
hits_by_thresh_bonf_female[, .(hits = .N), by = threshold]
display_venn <- function(x, ...) {
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...)
grid.draw(venn_object)
}
display_venn(list(`Cross-sex` = hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID, Male = hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID, Female = hits_by_thresh_bonf_female[threshold == "Pt_5e-08"]$ID), fill = c("gray80", "lightblue", "lightpink"))
Loading required package: grid
Loading required package: futile.logger

male_ids <- hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID
cross_ids <- hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID
male_ids[!male_ids %in% cross_ids]
character(0)
get_full_fit <- function(prs_mat,cov_mat,mvalues){
top_design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
top_prs_fit <- lmFit(mvalues, top_design_prs)
top_prs_fit <- eBayes(top_prs_fit)
top_prs_hits <- topTable(top_prs_fit, coef = 2, adjust.method = "bonf", number = Inf, genelist = rownames(mvalues))
}
top_prs_hits <- get_full_fit(prs_mat,cov_mat,mvalues)
top_male_prs_hits <- get_full_fit(prs_mat_male, cov_mat_male, mvalues_male)
top_female_prs_hits <- get_full_fit(prs_mat_female, cov_mat_female, mvalues_female)
save(list=c("top_prs_hits","top_male_prs_hits","top_female_prs_hits"),file="prs_nalls_cross_w_sex_stratified.RData")
load("prs_nalls_cross_w_sex_stratified.RData")
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
as.data.frame() %>%
rownames_to_column(var = "name")
prs_annot <- data.table(top_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_male <- data.table(top_male_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_female<- data.table(top_female_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
plot_prs_hits <- function(prs_annot,label_color){
ggplot(prs_annot, aes(logFC, -log10(P.Value))) +
geom_point() +
geom_point(
data = subset(prs_annot, adj.P.Val < 0.05 & abs(logFC) > 0.03),
color = label_color,
mapping = aes(logFC, -log10(P.Value))
) +
geom_hline(
linetype = "dashed",
yintercept = min(-log10(prs_annot$P.Value[prs_annot$adj.P.Val < 0.05]))
) +
geom_vline(linetype = "dashed", xintercept = 0.03) +
geom_vline(linetype = "dashed", xintercept = -0.03) +
geom_text_repel(
data = prs_annot %>% filter(abs(logFC) > 0.03 & adj.P.Val < 0.05),
color = "dodgerblue",
mapping = aes(logFC, -log10(P.Value), label = ifelse(gene != "", gene, ID)),
size = 3,
max.overlaps = 20
) +
labs(y = bquote("log"[10] ~ "(P)"), x = quote(Delta ~ "M" ~ Methylation)) +
theme_minimal()
}
plot_prs_hits(prs_annot,"gray40")


plot_prs_hits(prs_annot_male,"lightblue")

plot_prs_hits(prs_annot_female,"pink")


go enrichment?
library(gprofiler2)
cross_sex <- unique(gsub(";.*","",manifest[manifest$name %in% hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID,]$UCSC_RefGene_Name))
background <- unique(gsub(";.*","",manifest$UCSC_RefGene_Name))
gost_res <- gost(query=cross_sex,custom_bg = background)
gostplot(gost_res)
gmirror(
prs_annot_male[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
prs_annot_female[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
annotate_snp = c(male_annot_cpg,female_annot_cpg),
tline = max(prs_annot_male[adj.P.Val<0.25]$P.Value),
bline = max(prs_annot_female[adj.P.Val<0.25]$P.Value),
highlight_p = c(max(prs_annot_male[adj.P.Val<0.25]$P.Value),max(prs_annot_female[adj.P.Val<0.25]$P.Value)),
toptitle="Male",
bottomtitle ="Female",
highlighter="green",
background="white"
)
[1] "Saving plot to gmirror.png"
TableGrob (2 x 1) "arrange": 2 grobs

Manhattan plot vs GWAS manhattan plot
library(qqman)
copy_annot <- prs_annot[cpg_pos, on = c(ID = "name")] %>%
mutate(chr = as.numeric(recode(gsub("chr", "", chr),X="23",Y="24")))
to_plot <- copy_annot[, .(SNP = gene, CHR = chr, BP = pos, P = P.Value, FDR = adj.P.Val)][!is.na(P)]
manhattan(to_plot,
annotatePval = max(to_plot[FDR < 0.05]$P),
annotateTop = TRUE,
genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
suggestiveline = FALSE
)
manhattan(to_plot[CHR == 17],
annotatePval = max(to_plot[FDR < 0.05]$P),
annotateTop = FALSE,
genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
suggestiveline = FALSE
)
qq(to_plot$P)
DMRs
library(DMRcate)
Loading required package: minfi
Loading required package: BiocGenerics
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply,
parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB
The following object is masked from ‘package:limma’:
plotMA
The following objects are masked from ‘package:dplyr’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq,
Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union,
unique, unsplit, which, which.max, which.min
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:data.table’:
first, second
The following objects are masked from ‘package:dplyr’:
first, rename
The following object is masked from ‘package:tidyr’:
expand
The following object is masked from ‘package:base’:
expand.grid
Loading required package: IRanges
Attaching package: ‘IRanges’
The following object is masked from ‘package:data.table’:
shift
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
The following object is masked from ‘package:purrr’:
reduce
Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Loading required package: DelayedArray
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
The following object is masked from ‘package:dplyr’:
count
Loading required package: BiocParallel
Attaching package: ‘DelayedArray’
The following objects are masked from ‘package:matrixStats’:
colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
The following object is masked from ‘package:purrr’:
simplify
The following objects are masked from ‘package:base’:
aperm, apply, rowsum
Loading required package: Biostrings
Loading required package: XVector
Attaching package: ‘XVector’
The following object is masked from ‘package:purrr’:
compact
Attaching package: ‘Biostrings’
The following object is masked from ‘package:base’:
strsplit
Loading required package: bumphunter
Loading required package: locfit
locfit 1.5-9.4 2020-03-24
Attaching package: ‘locfit’
The following object is masked from ‘package:purrr’:
none
replacing previous import 'minfi::getMeth' by 'bsseq::getMeth' when loading 'DMRcate'
S4_to_dataframe <- function(s4obj) {
nms <- slotNames(s4obj)
lst <- lapply(nms, function(nm) slot(s4obj, nm))
as.data.frame(setNames(lst, nms))
}
run_dmrcate <- function(prs_mat,cov_mat,mvalues){
design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
prs_annotated <- cpg.annotate(datatype = "array", object = mvalues, analysis.type = "differential", design = design_prs, coef = 2, what = "M", arraytype = "EPIC", fdr = 0.05)
prs_dmr_res <- dmrcate(prs_annotated, lambda = 1000, C = 2)
return(S4_to_dataframe(prs_dmr_res))
}
dmr_cross <- run_dmrcate(prs_mat, cov_mat, mvalues)
Loading required package: IlluminaHumanMethylationEPICanno.ilm10b4.hg19
Your contrast returned 85 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
dmr_males <- run_dmrcate(prs_mat_male,cov_mat_male,mvalues_male)
Your contrast returned 59 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
dmr_female <- run_dmrcate(prs_mat_female,cov_mat_female,mvalues_female)
Your contrast returned 41 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
annotation <- minfi::getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
Plotting all DMRs
get_dmr_effects <- function(dmr, limma_res,mvalues) {
dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
cpgs <- as.data.table(annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ])
res <- limma_res[cpgs,on=c("ID"="Name"),nomatch=0 ]
dmr_1 <- as.data.frame(res[!is.na(res$logFC), ])
dmr_methy <- reshape2::melt(lumi::m2beta(mvalues[dmr_1$ID, ]), stringsAsFactors = FALSE)
to_plot <- merge(dmr_1, dmr_methy, by.x = "ID", by.y = "Var1")
to_plot$DMR <- dmr
to_plot
}
get_dmr_res <- function(dmr, limma_res) {
dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
cpgs <- annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ]
res <- cbind(cpgs, limma_res[cpgs$Name, ])
res$DMR <- dmr
return(res)
}
plot_dmrs <- function(dmr_res,limma_res,prs_mat, mvalues, case_control) {
to_plot <- rbindlist(lapply(dmr_res$coord, function(dmr) get_dmr_effects(dmr, limma_res,mvalues))) %>%
mutate(SCORE1_AVG = scale(prs_mat[match(Var2,IDs$IID),`Pt_5e-08`]),PD = PD[match(Var2,IID)]$PD) %>%
filter(!is.na(SCORE1_AVG))
for(cur_dmr in unique(to_plot$DMR)){
cur_plot <- to_plot[DMR == cur_dmr]
p1 <- ggplot(cur_plot , aes(pos, value, color = SCORE1_AVG)) +
geom_point() +
scale_color_gradient(low = rev(case_control)[1], high = rev(case_control)[2]) +
theme_minimal() +
labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "Normalized PD PRS")+
theme(axis.text.x = element_text(angle = 90))
p2 <- ggplot(cur_plot %>% mutate(PD = ifelse(PD == 1, "CASE", "CONTROL")), aes(factor(pos), value, color = PD)) +
geom_boxplot(position = position_dodge(0.75)) +
scale_color_manual(values = case_control) +
theme_minimal() +
stat_summary(aes(group = PD), fun = mean, geom = "line") +
labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "PD status") +
theme(axis.text.x = element_text(angle = 90))
print(p1)
print(p2)
}
}
plot_dmrs(dmr_cross, prs_annot, prs_mat,mvalues, rev(c("gray80", "gray40")))




















plot_dmrs(dmr_males,prs_annot_male,prs_mat_male, mvalues_male,rev(c("light blue", "lightblue4")))




















plot_dmrs(dmr_female, prs_annot_female, prs_mat_female, mvalues_female,rev(c("lightpink", "lightpink4")))












dmr_cross
dmr_males
dmr_female
save(list=c("dmr_cross","dmr_males","dmr_female"),file="prs_dmr_nalls.RData")
---
title: "PRSice2 Development of risk score"
output:
  html_notebook:
    toc: yes
---

```{r setup, include=FALSE}
library(tidyverse)
library(ggrepel)
library(data.table)
library(knitr)
library(limma)
library(foreach)
library(doParallel)
Sys.setlocale("LC_MESSAGES", "en_US.utf8")
knitr::opts_chunk$set(echo = TRUE)
```

# Step 0: Prepare covariates and input files
```{r}
IDs <- fread("~/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call.fam")[, .(FID = V1, IID = V2)]
prsice_cov <- fread("prsice_cov_and_status_mvalues.txt")
prsice_cov <- prsice_cov[match(IDs$IID,prsice_cov$IID)]
all(IDs$IID ==prsice_cov$IID)
covariate <- cbind(IDs, prsice_cov[,-c(16,17,18)])

PD <- cbind(IDs,prsice_cov[,16])

head(covariate)
head(PD)
fwrite(na.omit(PD), "TERRE.pheno", sep = "\t")
fwrite(na.omit(covariate), "TERRE.covariate", sep = "\t")
```
```{r}
covariate
```


# Step 1: Run PRSice-2 on Nalls et al 2019 Sumstats
```{bash,eval=FALSE}
Rscript /home1/NEURO/casazza/PRSice.R \
    --prsice /home1/NEURO/casazza/PRSice_linux\
    --base /home1/NEURO/casazza/nalls_PD.QC.gz\
    --base-info INFO:0.8 \
    --base-maf MAF:0.01 \
    --cov TERRE.covariate \
    --binary-target T\
    --beta  \
    --ld /home1/NEURO/casazza/1000G_plink/EUR_phase3  \
    --out TERRE_PRSice \
    -q 5\
    --all-score\
    --pheno TERRE.pheno \
    --snp SNP \
    --stat b \
    --pvalue p\
    --target /home1/NEURO/casazza/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call \
    --thread 32
```
# Step 2: Evaluate output
```{r, out.width="400px"}
include_graphics("prsice_images/TERRE_PRSice_BARPLOT_2022-06-30.png")
include_graphics("prsice_images/TERRE_PRSice_HIGH-RES_PLOT_2022-06-30.png")
include_graphics("prsice_images/TERRE_PRSice_QUANTILES_PLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_BARPLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_male_HIGH-RES_PLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_male_QUANTILES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_BARPLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_female_HIGH-RES_PLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_female_QUANTILES_PLOT_2022-10-04.png")
```
## Plotting PRSice Data on my own
```{r}
library(ggnewscale)
prsice_male_meta <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.prsice")
ggplot(prsice_male_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()

prsice_female_meta <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.prsice")
ggplot(prsice_female_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()

prsice_meta <- fread("prsice_data/TERRE_PRSice.prsice")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink") +
  labs(color = bquote("Female log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue") +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Male -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink",guide = guide_colorbar(order=3)) +
  labs(color = bquote("Female -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue",guide = guide_colorbar(order=2)) +
  labs(color = bquote("Male -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "gray40", high = "gray80",guide = guide_colorbar(order=1)) +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Cross-sex -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top",legend.title = element_text(size=7))
```

# Step 3 run linear model at different thresholds for SNP inclusion
This is updated to recently processed DNAm Data 20 outliers in DNAm removed:
```{r}
load("/home1/NEURO/SHARE_DECIPHER/processed_DNAm_data/2022/TERRE_processed_2022/1-TERRE_RG_filtered.RData") #PD_RG_filtered
# Assign genotyping ID to data
original_covars <- fread("/home1/NEURO/SHARE_DECIPHER/terre_meta_master.csv")[, .(patient, IID = gsub("_PAE.*", "", IID))]
betas_combat <- minfi::getBeta(PD_RG_filtered)
colnames(betas_combat) <- original_covars$IID[match(colnames(betas_combat), original_covars$patient)]
betas_combat <- betas_combat[, colnames(betas_combat) %in% covariate$IID]
```
Let's check how the data looks for the first 5 subjects:
```{r}
ggplot(betas_combat[, 1:5] %>% as.data.table(keep.rownames = T) %>% melt(id.vars = "rn", value.name = "betas", variable.name = "subject"), aes(betas, color = subject))+
  geom_density()
```

### Match DNA, PRS, and metadata @TODO fix this after update of PRS etc.
```{r}
prsice_best <- fread("prsice_data/TERRE_PRSice.best")[match(colnames(betas_combat), IID,nomatch=0),.(best=PRS)]
prsice_all <- cbind(
  fread("prsice_data/TERRE_PRSice.all_score")[match(colnames(betas_combat), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_best
)
covariate <- covariate[match(colnames(betas_combat), IID)]
all(covariate$IID == colnames(betas_combat))
all(covariate$IID == prsice_all$IID)

covariate_male <- covariate[sex == 1] %>% select(-sex)
betas_male <- betas_combat[, covariate_male$IID]
prsice_male_best <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.best")[match(colnames(betas_male), IID,nomatch=0),.(best=PRS)]
prsice_male_all <- cbind(
  fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.all_score")[match(colnames(betas_male), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_male_best
)
covariate_male <- covariate_male[match(colnames(betas_male), IID,nomatch=0)]
all(covariate_male$IID == colnames(betas_male))
all(covariate_male$IID == prsice_male_all$IID)

prsice_female_best <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.best")
covariate_female <- covariate[sex == 0] %>% select(-sex) %>% filter(IID %in% prsice_female_best$IID)
betas_female <- betas_combat[, covariate_female$IID]
prsice_female_best <- prsice_female_best[match(colnames(betas_female), IID,nomatch=0),.(best=PRS)]
prsice_female_all <- cbind(
  fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.all_score")[match(colnames(betas_female), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_female_best
)
covariate_female <- covariate_female[match(colnames(betas_female), IID,nomatch=0)]
all(covariate_female$IID == colnames(betas_female))
all(covariate_female$IID == prsice_female_all$IID)


```
### Run limma

```{r}
mvalues <- lumi::beta2m(betas_combat)
prs_mat <- prsice_all[, -c(1, 2)]
cov_mat <- covariate[, -c(1, 2)]

mvalues_male <- lumi::beta2m(betas_male)
prs_mat_male <- prsice_male_all[, -c(1, 2)]
cov_mat_male <- covariate_male[, -c(1, 2)]

mvalues_female <- lumi::beta2m(betas_female)
prs_mat_female <- prsice_female_all[, -c(1, 2)]
cov_mat_female <- covariate_female[, -c(1, 2)]
```

```{r}
registerDoParallel(ncol(prs_mat) / 4)
hits <- foreach(prs_thresh = colnames(prs_mat)) %dopar% {
  design_prs <- model.matrix(~., data = cbind(prs_mat[, ..prs_thresh], cov_mat))
  prs_fit <- lmFit(mvalues, design_prs)
  prs_fit <- eBayes(prs_fit)
  topTable(prs_fit, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues))
}
names(hits) <- colnames(prs_mat)
hits_by_thresh_bonf <- rbindlist(hits, idcol = "threshold", fill = TRUE)

registerDoParallel(ncol(prs_mat_male) / 4)
hits_male <- foreach(prs_thresh = colnames(prs_mat_male)) %dopar% {
  design_prs_male <- model.matrix(~., data = cbind(prs_mat_male[, ..prs_thresh], cov_mat_male))
  prs_fit_male <- lmFit(mvalues_male, design_prs_male)
  prs_fit_male <- eBayes(prs_fit_male)
  topTable(prs_fit_male, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues_male))
}
names(hits_male) <- colnames(prs_mat_male)
hits_by_thresh_bonf_male <- rbindlist(hits_male, idcol = "threshold", fill = TRUE)

registerDoParallel(ncol(prs_mat_female) / 4)
hits_female <- foreach(prs_thresh = colnames(prs_mat_female)) %dopar% {
  design_prs_female <- model.matrix(~., data = cbind(prs_mat_female[, ..prs_thresh], cov_mat_female))
  prs_fit_female <- lmFit(mvalues_female, design_prs_female)
  prs_fit_female <- eBayes(prs_fit_female)
  topTable(prs_fit_female, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues_female))
}
names(hits_female) <- colnames(prs_mat_female)
hits_by_thresh_bonf_female <- rbindlist(hits_female, idcol = "threshold", fill = TRUE)

```

### Plotting EWAS vs Threshold Experiment by Sex
```{r}
to_plot <- rbind(
  hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_male[, .(hits = .N, Sex = "Male"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_female[, .(hits = .N, Sex = "Female"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0"))
) %>% mutate(Sex = factor(Sex, levels = c("Cross-sex", "Male", "Female")))
plot_pos <- position_dodge(width = 1)
ggplot(to_plot, aes(threshold, hits, fill = Sex, label = hits)) +
  geom_text(position = plot_pos, vjust = -0.25) +
  geom_col(position = plot_pos) +
  labs(x = "GWAS P Value Threshold", y = "EWAS Hits") +
  scale_fill_manual(values = c("grey80", "lightblue", "lightpink")) +
  theme_minimal()
hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold]
hits_by_thresh_bonf_male[, .(hits = .N), by = threshold]
hits_by_thresh_bonf_female[, .(hits = .N), by = threshold]
```
```{r}
display_venn <- function(x, ...) {
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

display_venn(list(`Cross-sex` = hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID, Male = hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID, Female = hits_by_thresh_bonf_female[threshold == "Pt_5e-08"]$ID), fill = c("gray80", "lightblue", "lightpink"))
male_ids <- hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID
cross_ids <- hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID
male_ids[!male_ids %in% cross_ids]
```

```{r}
get_full_fit <- function(prs_mat,cov_mat,mvalues){
  top_design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  top_prs_fit <- lmFit(mvalues, top_design_prs)
  top_prs_fit <- eBayes(top_prs_fit)
  top_prs_hits <- topTable(top_prs_fit, coef = 2, adjust.method = "bonf", number = Inf, genelist = rownames(mvalues))
}
top_prs_hits <- get_full_fit(prs_mat,cov_mat,mvalues)
top_male_prs_hits <- get_full_fit(prs_mat_male, cov_mat_male, mvalues_male)
top_female_prs_hits <- get_full_fit(prs_mat_female, cov_mat_female, mvalues_female)
save(list=c("top_prs_hits","top_male_prs_hits","top_female_prs_hits"),file="prs_nalls_cross_w_sex_stratified.RData")
```

```{r}
load("prs_nalls_cross_w_sex_stratified.RData")
```
```{r}
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
prs_annot <- data.table(top_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_male <- data.table(top_male_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_female<- data.table(top_female_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
plot_prs_hits <- function(prs_annot,label_color){
  ggplot(prs_annot, aes(logFC, -log10(P.Value))) +
    geom_point() +
    geom_point(
      data = subset(prs_annot, adj.P.Val < 0.05 & abs(logFC) > 0.03),
      color = label_color,
      mapping = aes(logFC, -log10(P.Value))
    ) +
    geom_hline(
      linetype = "dashed",
      yintercept = min(-log10(prs_annot$P.Value[prs_annot$adj.P.Val < 0.05]))
    ) +
    geom_vline(linetype = "dashed", xintercept = 0.03) +
    geom_vline(linetype = "dashed", xintercept = -0.03) +
    geom_text_repel(
      data = prs_annot %>% filter(abs(logFC) > 0.03 & adj.P.Val < 0.05),
      color = "dodgerblue",
      mapping = aes(logFC, -log10(P.Value), label = ifelse(gene != "", gene, ID)),
      size = 3,
      max.overlaps = 20
    ) +
    labs(y = bquote("log"[10] ~ "(P)"), x = quote(Delta ~ "M" ~ Methylation)) +
    theme_minimal()
}

plot_prs_hits(prs_annot,"gray40")
plot_prs_hits(prs_annot_male,"lightblue")
plot_prs_hits(prs_annot_female,"pink")
```


```{r}
prs_plot_data <- data.table(dnam = lumi::m2beta(mvalues["cg12609785",]), prs = prs_mat$`Pt_5e-08`,PD=ifelse(PD[match(colnames(mvalues),IID),]$PD == 1,"Case","Control"),Sex=factor(ifelse(cov_mat$sex ==1,"Male","Female"),levels=c("Male","Female")))
ggplot(prs_plot_data,aes(prs,dnam,color=Sex,shape = PD,group=Sex)) +
  geom_point(size=3)+
  geom_smooth(method="lm",se=F)+
  labs(y=bquote("Methylation"~beta),x="Parkinson's GRS")+
  scale_color_manual(values=c("Male"="lightblue","Female"="lightpink")) + 
  theme_minimal(base_size=20)
```

## go enrichment?
```{r,eval=FALSE}
library(gprofiler2)
cross_sex <- unique(gsub(";.*","",manifest[manifest$name %in% hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID,]$UCSC_RefGene_Name))
background <- unique(gsub(";.*","",manifest$UCSC_RefGene_Name))
gost_res <- gost(query=cross_sex,custom_bg = background)
gostplot(gost_res)
```
```{r}
cpg_pos <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Locations %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
male_annot_cpg <- (prs_annot_male %>% filter(adj.P.Val < 0.25) %>% left_join(cpg_pos,by=c("ID"="name")) %>% group_by(chr) %>% top_n(-10,P.Value))$ID
female_annot_cpg <- (prs_annot_female %>% filter(adj.P.Val < 0.25) %>% left_join(cpg_pos,by=c("ID"="name")) %>% group_by(chr) %>% top_n(-10,P.Value))$ID
library(hudson)
options(ggrepel.max.overlaps = Inf)
gmirror(
  prs_annot_male[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  prs_annot_female[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  annotate_snp = c(male_annot_cpg,female_annot_cpg),
  tline =  max(prs_annot_male[adj.P.Val<0.25]$P.Value),
  bline =  max(prs_annot_female[adj.P.Val<0.25]$P.Value),
  highlight_p = c(max(prs_annot_male[adj.P.Val<0.25]$P.Value),max(prs_annot_female[adj.P.Val<0.25]$P.Value)),
  toptitle="Male",
  bottomtitle ="Female",
  highlighter="green",
  background="white"
)

```

# Manhattan plot vs GWAS manhattan plot
```{r,eval=FALSE}
library(qqman)

copy_annot <- prs_annot[cpg_pos, on = c(ID = "name")] %>%
  mutate(chr = as.numeric(recode(gsub("chr", "", chr),X="23",Y="24")))
to_plot <- copy_annot[, .(SNP = gene, CHR = chr, BP = pos, P = P.Value, FDR = adj.P.Val)][!is.na(P)]
manhattan(to_plot,
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = TRUE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
manhattan(to_plot[CHR == 17],
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = FALSE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
qq(to_plot$P)
```


## DMRs

```{r}
library(DMRcate)
S4_to_dataframe <- function(s4obj) {
  nms <- slotNames(s4obj)
  lst <- lapply(nms, function(nm) slot(s4obj, nm))
  as.data.frame(setNames(lst, nms))
}
run_dmrcate <- function(prs_mat,cov_mat,mvalues){
  design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  prs_annotated <- cpg.annotate(datatype = "array", object = mvalues, analysis.type = "differential", design = design_prs, coef = 2, what = "M", arraytype = "EPIC", fdr = 0.05)
  prs_dmr_res <- dmrcate(prs_annotated, lambda = 1000, C = 2)
  return(S4_to_dataframe(prs_dmr_res))
}
dmr_cross <- run_dmrcate(prs_mat, cov_mat, mvalues)
dmr_males <- run_dmrcate(prs_mat_male,cov_mat_male,mvalues_male)
dmr_female <- run_dmrcate(prs_mat_female,cov_mat_female,mvalues_female)
```
```{r}
annotation <- minfi::getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
```
### Plotting all DMRs {.tabset .tabset-fade}
```{r}
get_dmr_effects <- function(dmr, limma_res,mvalues) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- as.data.table(annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ])
  res <- limma_res[cpgs,on=c("ID"="Name"),nomatch=0 ]
  dmr_1 <- as.data.frame(res[!is.na(res$logFC), ])
  dmr_methy <- reshape2::melt(lumi::m2beta(mvalues[dmr_1$ID, ]), stringsAsFactors = FALSE)
  to_plot <- merge(dmr_1, dmr_methy, by.x = "ID", by.y = "Var1")
  to_plot$DMR <- dmr
  to_plot
}
get_dmr_res <- function(dmr, limma_res) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ]
  res <- cbind(cpgs, limma_res[cpgs$Name, ])
  res$DMR <- dmr
  return(res)
}
plot_dmrs <- function(dmr_res,limma_res,prs_mat, mvalues, case_control) {
  to_plot <- rbindlist(lapply(dmr_res$coord, function(dmr) get_dmr_effects(dmr, limma_res,mvalues))) %>%
    mutate(SCORE1_AVG = scale(prs_mat[match(Var2,IDs$IID),`Pt_5e-08`]),PD = PD[match(Var2,IID)]$PD) %>%
    filter(!is.na(SCORE1_AVG))
  for(cur_dmr in unique(to_plot$DMR)){
    cur_plot <- to_plot[DMR == cur_dmr]
    p1 <- ggplot(cur_plot , aes(pos, value, color = SCORE1_AVG)) +
      geom_point() +
      scale_color_gradient(low = rev(case_control)[1], high = rev(case_control)[2]) +
      theme_minimal() +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "Normalized PD PRS")+
      theme(axis.text.x = element_text(angle = 90))
    p2 <- ggplot(cur_plot %>% mutate(PD = ifelse(PD == 1, "CASE", "CONTROL")), aes(factor(pos), value, color = PD)) +
      geom_boxplot(position = position_dodge(0.75)) +
      scale_color_manual(values = case_control) +
      theme_minimal() +
      stat_summary(aes(group = PD), fun = mean, geom = "line") +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "PD status") +
      theme(axis.text.x = element_text(angle = 90))
    print(p1)
    print(p2)
  }
}

plot_dmrs(dmr_cross, prs_annot, prs_mat,mvalues, rev(c("gray80", "gray40")))
plot_dmrs(dmr_males,prs_annot_male,prs_mat_male, mvalues_male,rev(c("light blue", "lightblue4")))
plot_dmrs(dmr_female, prs_annot_female, prs_mat_female, mvalues_female,rev(c("lightpink", "lightpink4")))
```


```{r}
dmr_cross
dmr_males
dmr_female
save(list=c("dmr_cross","dmr_males","dmr_female"),file="prs_dmr_nalls.RData")
```
